import requests
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from lxml import etree

headers = {
                'accept-language': 'zh-CN,zh;q=0.9',
                'cache-control': 'max-age=0',
                'cookie': 'Hm_lvt_c3acb27768b401b6598a1ae2797371a4=1693548649,1693581545;'
                          ' Hm_lpvt_c3acb27768b401b6598a1ae2797371a4=1693581548',
                'sec-fetch-dest': 'document',
                'sec-fetch-mode': 'navigate',
                'sec-fetch-site': 'same-origin',
                'sec-fetch-user': '?1',
                'upgrade-insecure-requests': '1',
                'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/'
                              '537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/'
                              '537.36 SLBrowser/7.0.0.6251 SLBChan/124'}
i = 0
list0 = []
list_img = []
#获取个人简介
for i in range(0,4):
    url = ("https://teacher.bupt.edu.cn/pyjslb.jsp?totalpage=4&PAGENUM="
           + str(i+1) + "&urltype=tsites.PinYinTeacherList&wbtreeid=1001&py=q&lang=zh_CN")
    # print(url)
    response = requests.get(url=url, headers=headers)
    response.encoding = "UTF-8"
    content = response.content
    tree = etree.HTML(content)
    #得到教师信息
    teacher_information_list = tree.xpath('//div[@class = "college-list2"]//'
                                          'div[@class = "teacherTxt2"]//text()')
    #得到教师院所
    teacher_department_list = tree.xpath('//div[@class = "college-list2"]//'
                                         'div[@class = "teacherTxt2"]/'
                                         'p[@class = "teacherinf"]/text()')
    for information in teacher_information_list:
        j = 0
        if " " in information:
            information = information.replace(" ", "")
        if "所在单位" in information:
            information = information.replace("所在单位：", "")
        if "@" not in information:
            information = information + ","
        list0.append(information)
        j+= 1

i = 0
#获取图片
for i in range(0,4):
    url = ("https://teacher.bupt.edu.cn/pyjslb.jsp?totalpage=4&PAGENUM="
           + str(i+1) + "&urltype=tsites.PinYinTeacherList&wbtreeid=1001&py=q&lang=zh_CN")
    driver = webdriver.Edge()
    driver.get(url)
    time.sleep(2)
    img_list = driver.find_elements(By.XPATH, '//div[@class = "college-list2"]//'
                                              'div[@class = "teacherPic2"]//img')
    for img in img_list:
        j = 0
        src = img.get_attribute('src')
        list_img.append(src)
        j+= 1
    driver.close()
k = 0
i = 0


#测试输出
for k in range(len(list0)):
    c = list0[k]
    if c == "\n\n,":
        c = c.replace("\n\n,","")
        img = list_img[i]
        i = i + 1
        print("暂无," + img)
    elif c == "\n\n\n,":
        c = c.replace("\n\n\n,", "")
    elif c == "\n,":
        c = c.replace("\n,", "")
    elif c == ",":
        c = c.replace(",", "")
    print(c,end="")
    if "@" in c:
        img = list_img[i]
        i = i + 1
        print("," + img)
    k = k + 1

